'''
Author: 奔跑的乌龟
email: 435327238@qq.com
'''
from DbController import DbController
from Request import Request
from lxml import etree
import log
import sys
import html
from Chapter import Chapter

class Book:
    # 初始url
    url = "http://www.aidusk.com/t/"

    # 文档
    response_Text = ""

    # 章节目录
    crapter = []

    # 书名
    title = ""

    # 作者
    author = ""

    # 简介
    synopsis = ""

    # 封面
    photo = ""

    # 书籍类型
    type = 0

    # 开始id
    book_id = ""

    # 分类列表
    classification = ["" ,"仙侠修真", "科幻未来", "恐怖惊悚", "西方奇幻", "玄幻异界", "神话王朝", "魔法幻情", "东方玄幻" ,"历史传奇" ,"国术武侠", "网络游戏", "谍战特工" ,"军事战争", "体育竞技" ,"官场职场" ,"浪漫言情", "青春校园", "宫廷贵族", "耽美言情", "都市生活", "穿越重生", "风雨同人", "短篇综合", "乡村布衣"]

    # 构造函数
    def __init__(self, id):
        self.book_id = id
        self.url = self.url + str(id)
        self.start()
        self.parseData()
        print("爬取结束：" + self.url)
    # 开始
    def start(self):
        Req = Request(self.url)
        self.response_Text = etree.HTML(Req.get())
                      
    # 解析数据
    def parseData(self):
        
        try:
            # 获取作者
            self.author = self.response_Text.xpath('//span/text()')[1]
            # 获取书名
            self.title = self.response_Text.xpath('//a/text()')[9]
            # 获取封面图
            self.photo = self.response_Text.xpath('//meta/@content')[13]
            # 获取类型
            self.type = self.response_Text.xpath('//meta/@content')[14]
        except BaseException as e:
            # 错误文件
                errFile = e.__traceback__.tb_frame.f_globals['__file__']
                # 错误行号
                errLine = e.__traceback__.tb_lineno
                a, b, c = sys.exc_info()
                log.writeLog(errFile, errLine, a, b, "获取书籍信息失败!")
            
        # 获取简介
        i = 0
        while (True):
            self.synopsis += self.response_Text.xpath('//div[@class="intro"]/p/text()')[i]
            i += 1
            if (i >= 4):
                break
            
        # 获取章节数据
        l = 0
        self.crapter = []
        while(True):
            print("爬取章节"+str(l))
            try:
                url = self.response_Text.xpath('//div[@class="book_con_list"]/ul/li/a/@href')[l]
                reqUrl = self.url +"/"+ url
                ChapterModel = Chapter(self.book_id ,reqUrl)
                chapterData = ChapterModel.parseData()
                self.crapter.append(chapterData)
                l += 1
            except BaseException as e:
                # 错误文件
                errFile = e.__traceback__.tb_frame.f_globals['__file__']
                # 错误行号
                errLine = e.__traceback__.tb_lineno
                a, b, c = sys.exc_info()
                log.writeLog(errFile, errLine, a, b, "获取章节数据失败")
                break
        
        # 添加书籍和章节方法
        self.insertBookData()

    # 事务插入
    def insertBookData(self):
        bookSql = "insert into novel_books(book_id, author, title, type, photo, synopsis) value(%s, %s, %s, %s, %s, %s)"    
        bookData = [ self.book_id, self.author, self.title, self.classification.index(self.type), self.photo, self.synopsis ]
        
        chapterSql = "insert into novel_chapter(book_id, chaptertitle, content) values(%s, %s, %s)"

        # 如果章节少于10,则不爬取该文章
        if len(self.crapter)<10:
            print("章节数少于10,不爬取该书籍")
            return

        # 创建数据库操作对象
        Db = DbController()
        # 开始事务
        Db.transaction_start()
        
        Db.insert(bookSql, bookData)    # 书籍
        Db.inserts(chapterSql, self.crapter)    # 章节

        # 结束事务
        Db.transaction_end()